\documentclass[11pt,a4paper,notitlepage]{article}

\usepackage{setspace}
\usepackage{fullpage}

\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage[noend]{algorithmic}
\usepackage{url}
\usepackage{hyperref}

\newcommand{\xor}{\oplus}
\newcommand{\bigxor}{\bigoplus}
\newcommand{\bitset}{\{0,1\}}
\newcommand{\ip}[1]{\langle #1 \rangle}

\newcommand{\from}{\leftarrow}
\newcommand{\dist}{\sim}
\newcommand{\ul}{\underline}
\newcommand{\ol}{\overline}

\newcommand{\RR}{\mathbb{R}}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\F}{\mathbb{F}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\C}{\mathbb{C}}
\newcommand{\NP}{{\cal NP}}
\newcommand{\even}{{\rm{even}}}
\newcommand{\odd}{{\rm{odd}}}
\renewcommand{\neg}{{\rm neg}}
\newcommand{\cond}{\Big\vert}
\newcommand{\etal}{\textit{et~al.}}

\newcommand{\abs}[1]{\left| #1 \right|}
\newcommand{\vectornorm}[1]{\left|\left|#1\right|\right|}
\newcommand{\eqdef}{\stackrel{\rm def}{=}}
\newcommand{\poly}{{\rm poly}}
\newtheorem{thm}{Theorem}
\newtheorem{claim}[thm]{Claim}
\newtheorem{construction}[thm]{Construction}
\newtheorem{conjecture}[thm]{Conjecture}
\newtheorem{assumption}[thm]{Assumption}
\newtheorem{corr}[thm]{Corollary}
\newtheorem{dfn}[thm]{Definition}
\newtheorem{prop}[thm]{Proposition}
\newtheorem{rem}[thm]{Remark}
\newtheorem{lemma}[thm]{Lemma}

\newcommand{\rnote}[1]{{(\sf Ron's Note:} {\sl{#1}} {\sf )}}
\newcommand{\gnote}[1]{{(\sf Guy's Note:} {\sl{#1}} {\sf )}}

\DeclareMathOperator*{\prob}{{\rm Pr}}
\DeclareMathOperator*{\E}{{\mathbf E}}


\DeclareMathOperator*{\Var}{{\mathbf Var}}


\newcommand{\round}[1]{\lfloor #1 \rceil}
\newcommand{\rounddown}[1]{\lfloor #1 \lceil}
\newcommand{\roundup}[1]{\lceil #1 \rceil}

\begin{document}
\title{Pseudorandomness - Exercise 1}
\author{Guy Katz \and Ron Rothblum}

\maketitle

\section*{Question 1}

\paragraph{(a)}

Let $\beta_1,\dots,\beta_t$ be the distinct elements in the input
stream $a_1,\ldots,a_k$. Then $X=\sum_{i=1}^k
(-1)^{Z_{a_i}}=\sum_{i=1}^t m_i \cdot (-1)^{Z_{\beta_i}}$ and we have:

\begin{align*}
  \E\left[X^2\right] &= \E\left[ \left( \sum^t_{i=1} m_i\cdot
      (-1)^{Z_{\beta_i}} \right) \cdot \left( \sum^t_{j=1} m_j\cdot
      (-1)^{Z_{\beta_j}} \right)\right] \\
  &= \E\left[ \left(\sum^t_{i=1} m^2_i\cdot (-1)^{2\cdot Z_{\beta_i}}
    \right) + \left( \sum_{i \neq j} m_im_j\cdot
      (-1)^{Z_{\beta_i}}(-1)^{Z_{\beta_j}} \right)\right] \\
  &\stackrel{1}{=} \sum^t_{i=1} m^2_i + \sum_{i \neq j} m_im_j \cdot
  \E\left[ (-1)^{Z_{\beta_i}}(-1)^{Z_{\beta_j}} \right] \\
  &\stackrel{2}{=} \sum^t_{i=1} m^2_i + \sum_{i \neq j} m_im_j \cdot
  \E[(-1)^{Z_{\beta_i}}] \cdot
  \E[(-1)^{Z_{\beta_j}}] \\
  &\stackrel{3}{=} \sum^t_{i=1} m^2_i \\
  &= f_2
\end{align*}
\noindent
where (1) follows from linearity of expectation, (2) and (3) from the fact
that the $Z_{a_i}$'s are 4-wise independent (and in particular
pairwise independent and unbiased).

\paragraph{(b)}
\begin{align*}
  \Var(X^2) &= \E[X^4] - \left( \E[X^2] \right)^2 \\
  &= \E \left[ \left( \sum_i m_i\cdot (-1)^{Z_{a_i}} \right) \cdot
    \left( \sum_j m_j\cdot (-1)^{Z_{a_j}} \right) \cdot \left( \sum_r
      m_r\cdot (-1)^{Z_{a_r}} \right) \cdot \left( \sum_s m_s\cdot
      (-1)^{Z_{a_s}} \right) \right] -
  f_2^2 \\
  &\stackrel{*}{\leq} \E \left[\frac{1}{2}\binom{4}{2} \left(
      \sum_i \sum_j m^2_im^2_j\cdot (-1)^{2\cdot Z_{a_i} +
        2\cdot
        Z_{a_j}} \right) \right] - f_2^2 \\
  &= 3\sum_i m^2_i\sum_j m^2_j -f_2^2 \\
  &= 2f_2^2
\end{align*}

\noindent
$*$ - similar to \textbf{(a)}, this time using their 4-wise independence. The only
combinations that don't cancel out are those where there are two pairs of
equal indices. There are $\frac{1}{2}\binom{4}{2} = 3$ different
partitions of 4 variables into 2 pairs. The case $i=j=s=k$ is counted
twice, so we get a $\leq$ relation.

\paragraph{(c)}
\noindent
Observe that $\E[Y]=f_2$ and that $Y$ is the sum of $t$ independent
random variables distributed in the range $[-k,k]$. Thus, by
Hoeffding's inequality we have:

\[
\prob \left[ |Y-f_2| > \frac{f_2}{100} \right] \leq 2 e^{-\frac{2 f_2^2 \cdot
  t^2}{100^2 \cdot t \cdot 4k^2 }} \leq 2e^{-\frac{t}{20000}}
\]

where the last inequality follows from the fact that (using
Cauchy-Shwarz) $f_2=\sum m_a^2 \geq \frac{(\sum m_a)^2}{k} = k$.
Thus, by setting $t > 20000 \cdot \ln 200$ we obtain probability less than
$1\%$ to deviate from $f_2$ (by more than $1\%$).

\paragraph{(d)}
The seed length for each $X^{(i)}$ is $4\log N$ so in total we need
$4t \log N$ bits. In addition, for the $t$ counters (each in $[-k,k]$)
we require an additional $O(t\log k)$ bits. In total $O \left((\log N +
  \log k)t \right)$.

Using the $4$-wise $\epsilon$-biased generator will not significantly
help in this situation since the amount of independence required is
just a (small) constant and therefore the change will only affect the
constant (hidden by the big-O notation).

\section*{Question 2}
Let $S \subseteq \{\pm 1\}^n$ be an $\epsilon$-biased set where
$t=|S|$. Consider the following function/probability distribution $f :
\{\pm 1\}^n \to [0,1]$:
\[
f(x) = \begin{cases} \frac1{t} & \text{if } x \in S \\ 0 &
  \text{otherwise} \end{cases}
\]

\begin{claim}
  For every $\emptyset \neq S \subseteq \bitset^n$, it holds that $|
  \hat{f}(S) | \leq \frac{\epsilon}{2^n}$.
\end{claim}
\begin{proof}
  \begin{align*}
    \left| \hat{f}(S) \right| &= \left| \E_x[\chi_S(x)f(x)] \right| \\
    &= \frac1{2^n}\left| \sum_{x}[\chi_S(x)f(x)] \right| \\
    &= \frac1{2^n} \left| \prob_{x \dist f}[\chi_S(x)=1] - \prob_{x \dist f}[\chi_S(x)=-1] \right| \\
    &= \frac{bias_f(S)}{2^n} \\
    &\leq \frac{\epsilon}{2^n}
  \end{align*}
\end{proof}

\noindent By Parseval, it holds that $\E[f^2]=\sum_S \hat{f}^2(S)$. In
our case $\E[f^2] = \frac{t}{2^n} \cdot \frac1{t^2} = \frac1{t 2^n}$ and:
\[
\sum_S \hat{f}^2(S) = \hat{f}^2(\emptyset) + \sum_{S \neq
  \emptyset}\hat{f}^2(S) \leq \frac1{2^{2n}} + 2^n \frac{\epsilon^2}{2^{2n}}
\]
and therefore we have:
\[
\frac1{t} \leq {2^{-n}} + \epsilon^2
\]
which implies the following lower bound on the set size $t$:
\[
t \geq \frac1{2^{-n} + \epsilon^2}
\]

\section*{Question 3}
\paragraph{(a)} The $k$-wise independent generator is defined as
$K(a_0,\dots,a_{k-1})=(\sum_{i = 0}^{k-1}a_ix^i)_{x \in \F}$. This
transformation can alternatively be written as:
\begin{align*}
  K(a_0,\dots,a_{k-1})=
  \begin{bmatrix}
    1 & x_1 & x_1^2 & \ldots & x_1^{k-1} \\
    1 & x_2 & x_2^2 & \ldots & x_2^{k-1} \\
    & & \vdots & \\
  \end{bmatrix} \cdot
  (a_0,\dots,a_{k-1})^T
\end{align*}
where $\F=\{x_1,x_2,\dots\}$. Therefore $K$ is indeed a linear
transformation.

\paragraph{(b)}
Because $K$ is linear, there exists a matrix $A \in \F_2^{n \times m}$
such that $K(y)=A y$.

\begin{claim}
  $K(B(x))$ is a $k$-wise $2^{-k}\epsilon$-biased generator (i.e.,
  every set of at most $k$ output variables fools any \emph{linear}
  test up to a $2^{-k}\epsilon$ factor).
\end{claim}
\begin{proof}
  Let $X \dist U_s$ be a uniformly random and let $Y=B(X)$ and
  $Z=K(Y)=A \cdot Y$. Consider a sequence of $k$ indices $1 \leq i_1
  \leq \ldots \leq i_k \leq n$ of $Z$. We show that any linear test on
  these indices is fooled with probability at least $2^{-k}\epsilon$.

  Let $\emptyset \neq S \subseteq [1,\dots,k]$ specify such a linear
  test. Note that:
  \[
  \sum_{j \in S} Z_j = \sum_{j \in S}\ip{A_{i_j},Y} = \ip{\sum_{j \in
      S}A_{i_j},Y}
  \]
  where $A_{i_j}$ denotes the $i_j$-th row of $A$ and $Z_j$ denotes the
  $j$-th bit of $Z$.  Observe that the $A_{i_j}$'s are linearly
  independent\footnote{If any $k$-rows of $A$ are linearly dependent
    then the distribution induced by $A$ is not $k$-wise independent
    (i.e., any k-1 of the dependent variables determine the last
    one).}  and therefore $\sum_{j \in S}A_{i_j}$ is not the all zeros
  vector. Thus, $S$ specifies a linear test on $Y$ and is fooled with
  probability $2^{-k}\epsilon$.
\end{proof}

Using the foregoing claim and the Parity Lemma, $K(B(x))$ is a
$k$-wise $\epsilon'$-dependent generator for
$\epsilon'=2^{-\frac{k}{2}} \epsilon < \epsilon$.

\paragraph{(c)}
Using the constuction of the $k$-wise independent set and
$\epsilon$-biased set from class we can obtain $m = O(k \log n)$ and
$s=O(\log m + \log \left( \frac{2^k}{\epsilon}\right)) = O(\log \log n
+ k + \log \frac1{\epsilon})$, as required.

\section*{Question 4}
\paragraph{(a)} Consider the random graph $G_{n,\frac1{2}}$ on $n$
vertices obtained by sampling each edge independently with probability
$\frac1{2}$.

Let $v_1,\dots,v_k$ be an arbitrary set of $k$ (distinct)
vertices. These vertices form a clique if all edges between them
exist, and they form an independent set if no edge between them
exists. Since these events are disjoint, the probability that the
vertices form neither a clique nor an independent set is:
\begin{align*}
  \Pr[\text{neither clique nor independent set}] &= 1 -
  \Pr[\text{clique}] - \Pr[\text{independent set}] \\ &= 1 -
  2^{-\binom{k}{2}} - 2^{-\binom{k}{2}} = 1 - 2^{-\binom{k}{2}+1}
\end{align*}
A graph with $n$ vertices is not a Ramsey Graph if there exists a subset of $k$
vertices forming either a clique or an independent set. By the
union bound,
\begin{align*}
  \Pr[G_{n,\frac{1}{2}} \text{ isn't a Ramsey Graph}] &\leq
  \binom{n}{k}(1-(1-2^{-\binom{k}{2}+1})) \\ &=
  \binom{n}{k}(2^{-\binom{k}{2}+1})
\end{align*}
Given a $k$, we want to show that there exists an $n=2^{\Omega(k)}$ for
which the above probability is smaller than 1. This is equivalent to
stipulating that
\[
  \binom{n}{k} < 2^{\binom{k}{2}-1}.
\]
\noindent First we use the fact that
\[
\binom{n}{k} \leq \left( \frac{n \cdot e}{k} \right)^k
\]
\noindent and therefore it suffices to find values of $n$ for which
\[
\left( \frac{n \cdot e}{k} \right)^k < 2^{\frac{k(k-1)}{2}-1}.
\]
\noindent or,
\[
n < \frac{k}{e} \cdot 2^{\frac{(k-1)}{2}-\frac1{k}}.
\]
And so, by choosing $n=2^{\frac{(k-1)}{2}-\frac1{k}}=2^{\Omega(k)}$ we
get that a random graph is an $(n,k)$-Ramsey Graph with non-zero
probability, and therefore such a graph must exist.

\paragraph{(b)}
Since the sample space is $\binom{k}{2}$-wise $\epsilon$-dependent,
for a given set of vertices $v_1,\dots,v_k$, the probability that they
form either a clique or an independent set is at most
$2^{-\binom{k}{2}+1} + \epsilon$. Thus, by the union bound, the
probability that $G$ contains either a clique or an independent set is
at most:
\begin{align*}
  \Pr[G \text{ isn't a Ramsey Graph}] &\leq
  \binom{n}{k} \left( 2^{-\binom{k}{2}+1} + \epsilon \right) \\
  &\leq \left( \frac{n e}{k} \right)^k \left(
    2^{-\frac{k^2}{2}-\frac{k}{2}+1} + \epsilon \right)
\end{align*}
\noindent Using $k=2 \log n$ and $\epsilon=2^{-k^2}$ we get:
\begin{align*}
  \Pr[G \text{ isn't a Ramsey Graph}] &\leq \left( \frac{n e}{2 \log
      n} \right)^{2 \log n} \cdot \left( 2^{- 2 \log^2 n - \log n+1} +
    2^{-4 \log^2 n}
  \right) \\
  &\leq \left( \frac{n e}{2 \log n} \right)^{2
    \log n} \cdot 4 \left( 2^{-2\log^2 n} \right) \\
  &\leq \left( \frac{2^{2\log^2 n} \cdot n^c}{n^{2 \log \log n}}
  \right) \cdot \left (2^{-2\log^2
      n} \right) \\
  &= \frac{n^c}{n^{2 \log \log n}}
\end{align*}
which is smaller than $\frac1{ \log n}$ since $n^c \log n$ is smaller
than $n^{2 \log \log n}$ (for sufficiently large $n$).


% \begin{align*}
%   \Pr[G \text{ isn't a Ramsey Graph}] &\leq
%   \binom{n}{k}(2^{-\binom{k}{2}+1} + 2^{\frac{1}{2}\cdot
%     \binom{k}{2}}\epsilon) \\ &=
%   \binom{n}{k}(2^{-\frac{k(k-1)}{2}+1} +
%   2^{\frac{k(k-1)}{4}}\cdot 2^{-k^2}) \\ &=
%   \binom{n}{k}(2^{-\frac{1}{2}k^2 +\frac{1}{2}k +1} + 2^{-\frac{3}{4}k^2-\frac{1}{4}k}) \\ &\leq
%   (\frac{ne}{k})^k\cdot(2^{-\frac{1}{2}k^2 +\frac{1}{2}k +1} + 2^{-\frac{3}{4}k^2-\frac{1}{4}k}) \\ &=
%   (\frac{ne}{2\log n})^{2\log n}\cdot(2^{-\frac{1}{2}(2\log n)^2
%     +\frac{1}{2}(2\log n) +1} + 2^{-\frac{3}{4}(2\log
%     n)^2-\frac{1}{4}(2\log n)}) \\ &=
%   (\frac{ne}{2\log n})^{2\log n}\cdot(2^{-2\log^2 n +\log n +1} +
%   2^{-3\log^2 n-\frac{1}{2}\log n}) \\ &=
%   (\frac{ne}{2\log n})^{2\log n}\cdot(2n\cdot (2^{\log n})^{-2\log n} +
%   \frac{1}{\sqrt{n}}(2^{\log n})^{-3\log n}) \\ &=
%   (\frac{ne}{2\log n})^{2\log n}\cdot(2n\cdot (n)^{-2\log n} +
%   \frac{1}{\sqrt{n}}(n)^{-3\log n}) \\ &=
%   (\frac{e}{2\log n})^{2\log n}\cdot(2n +
%   \frac{1}{\sqrt{n}\cdot n^{\log n}}) \\ &=
%   \frac{1}{n^2}(\frac{e}{\log n})^{2\log n}\cdot(2n +
% \frac{1}{\sqrt{n}\cdot n^{\log n}}) \\ &=
%   \frac{2}{n}(\frac{e}{\log n})^{2\log n} + \frac{1}{\sqrt{n}\cdot
%     n^2\cdot n^{\log n}} (\frac{e}{\log n})^{2\log n}
%  \\ &\leq
%   \frac{1}{\log n}
% \end{align*}
% Where the last transition clearly holds for sufficiently large values
% of $n$.

\paragraph{(c)}
We show that $G_1 \times G_2$ does not contain a $k_1 k_2$ clique. The
proof that it does not contain a $k_1 k_2$ independent set is
analogous.

Suppose toward a contradiction that $G_1 \times G_2$ contains a $k_1
k_2$-clique composed of the following vertices
$(v_1,u_1),\dots,(v_{k_1 k_2},u_{k_1 k_2})$. Consider the set
$S=\{v_1,\dots,v_{k_1k_2}\}$. There are two cases to consider:

\begin{itemize}
\item $|S| \geq k_1$: In this case $G_1$ contains a $k$-clique which is a
  contradiction.
\item $|S| < k_1$: In this case we consider the partitioning of $V$
  according to the first coordinate. Since there are less than $k_1$
  partitions and the total set size is $k_1 k_2$, there must exist a
  partition of size greater than $k_2$. This partition contains a
  $k_2$-clique of $G_2$ which is also a contradiction.
\end{itemize}

\paragraph{(d)}
Let $D$ be the set of all graphs with $n$ nodes, constructed from a
$\binom{k}{2}$-wise $\epsilon$-dependent sample space as described in
section (b). Each graph corresponds to a point in the sample space;
therefore, the number of graphs is the number of possible seeds (using
the construction from (3b)):
\[
|D|=2^{\binom{k}{2}+\log\log \binom{n}{2}+\log \frac1{\epsilon}}
\]
and therefore there exist constants $c_1$ and $c_2$ such that:
\[
2^{c_1 \cdot k^2} \leq |D| \leq 2^{c_2 \cdot k^2}.
\]
We construct the product graph $G$ from all the graphs in $D$. This
graph $G$ has $n^{|D|}$ vertices.  We know from (b) that at most a
$\frac1{\log n}$ fraction of the graphs in $D$ are not a $(n,2\log
n)$-Ramsey graph. Therefore, the total number of graphs in $D$ that
are not such Ramsey graphs is at most $\frac{|D|}{\log n}$. By (4c),
the resulting graph $G$ is $(N,t)$ where:
\begin{align*}
  t &= k^{(1-\frac{1}{\log n})\cdot |D|}\cdot n^{\frac{1}{\log
      n}\cdot |D|} \\
  N &= n^{|D|}
\end{align*}
This is so because the $\frac{|D|}{\log n}$ ``bad'' graphs can
contribute all of their $n$ vertices each, while the $(1-\frac{1}{\log
  n})|D|$ ``good'' graphs can contribute no more than $2\log n$
vertices each. We proceed bound $t$ from above and below:
\begin{align*}
  t &\leq k^{(1-\frac{1}{\log n})\cdot |D|}\cdot n^{\frac{1}{\log
      n}\cdot |D|} \leq (k \cdot n^{\frac{1}{\log n}})^{|D|} \leq (k
  \cdot 2)^{|D|} \leq (4 \log n)^{2^{c_2' \log^2 n}} = \left( 2^{2^{c_2'
        \log^2 n}} \right)^{\log \log n + 2}
\end{align*}
and
\begin{align*}
  t &\geq n^{\frac{1}{\log n}\cdot |D|} \geq 2^{2^{c_1 \log^2 n}}
\end{align*}
We proceed to express $N$ as a function of $t$. Using our bounds we have:
\begin{align*}
  &\log \log t = O(\log^2 n) \\
  &\log \log \log t = \Omega( \log \log n)
\end{align*}
and therefore we have:
\begin{align*}
  N = n^{|D|} \geq 2^{\log n \cdot 2^{c_1 \log^2 n}} \geq
  \left( t^{\frac{c}{\log \log n}} \right)^{\log n} \geq
  t^{c' \frac{\sqrt{\log \log t}}{\log \log \log t}}.
\end{align*}
\end{document}